########################################################
# Code to preprocess elections data 
# Date created: 12/5/2019
# Date last modified: 7/20/2021
#########################################################
  library(tidyverse)
  library(reshape2)
  library(foreign)
  library(readxl)
  library(plm)
  # note that we calculate incumbent shares in the first step for dmr
  # only,because they're linked to the districts, which change over time

  #######
  #2003
  #######
  #Jefe Delegacional 
  jd03_cas <- read.dbf("chapter_5_rewarding_responsiveness/elections_raw/jd03_cas.DBF")
  jd03_cas$CASILLA <- NULL
  jd03_cas$OBSERVACIO <- NULL
  jd03_secc <- jd03_cas %>% group_by(SECCION, DTO_LOC, CVE_DEL, NOM_DEL) %>% summarise_all(funs(sum))
  colnames(jd03_secc)[5:29] <- paste("jd", colnames(jd03_secc)[5:29], sep = "_")
  
  #dmr
  dmr03_cas <- read.dbf("chapter_5_rewarding_responsiveness/elections_raw/dmr03_cas.DBF")
  dmr03_cas$CASILLA <- NULL
  dmr03_cas$OBSERVACIO <- NULL
  dmr03_secc <- dmr03_cas %>% group_by(SECCION, DTO_LOC, CVE_DEL, NOM_DEL) %>% summarise_all(funs(sum))
  colnames(dmr03_secc)[5:22] <- paste("dmr", colnames(dmr03_secc)[5:22], sep = "_")
  
  #drp 
  drp03_cas <- read.dbf("chapter_5_rewarding_responsiveness/elections_raw/drp03_cas.DBF")
  drp03_cas$CASILLA <- NULL
  drp03_cas$OBSERVACIO <-NULL
  drp03_secc <- drp03_cas %>% group_by(SECCION, DTO_LOC, CVE_DEL, NOM_DEL) %>% summarise_all(funs(sum))
  colnames(drp03_secc)[5:18] <- paste("drp", colnames(drp03_secc)[5:18], sep = "_")
  
  elections_2003 <- left_join(dmr03_secc, drp03_secc, by = c("SECCION", "DTO_LOC", "CVE_DEL", "NOM_DEL")) %>% left_join(., jd03_secc, by=c("SECCION", "DTO_LOC", "CVE_DEL", "NOM_DEL"))
  elections_2003$year <- 2003
  colnames(elections_2003[1]) <- "SECC"
  elections_2003 <- ungroup(elections_2003)
  elections_2003 <- elections_2003 %>% mutate_if(is.integer, funs(replace(., is.na(.), 0)))
  colnames(elections_2003)[3] <- "DEL"
  elections_2003$DEL <- as.numeric(elections_2003$DEL)
  elections_2003$jd_valid_votes <- rowSums(elections_2003[, c("jd_PAN", "jd_PRI", "jd_PRD",  "jd_PT","jd_PVEM", "jd_CONVERG","jd_PSN", "jd_PAS",
                                                              "jd_MP", "jd_PLM","jd_FC", "jd_CC_01", "jd_CC_02", "jd_CC_03","jd_CC_04","jd_CC_05", "jd_CC_06"  ,"jd_CC_07","jd_CC_08", "jd_CC_09", "jd_CC_10" ,"jd_CC_11")], na.rm = TRUE)
  elections_2003$jd_turnout <- (rowSums(elections_2003[, c("jd_valid_votes", "jd_V_NUL", "jd_V_BLAN")], na.rm = TRUE))/elections_2003$jd_LIS_NOM
  
  elections_2003$dmr_valid_votes <- rowSums(elections_2003[, c("dmr_PAN", "dmr_PRI","dmr_PRD", "dmr_PT", "dmr_PVEM","dmr_CONVERG", "dmr_PSN",  "dmr_PAS" , "dmr_MP",
                                                               "dmr_PLM", "dmr_FC" , "dmr_CC_01", "dmr_CC_02", "dmr_CC_03","dmr_CC_04")])
  elections_2003$dmr_turnout <- rowSums(elections_2003[, c("dmr_valid_votes", "dmr_V_NUL", "dmr_V_BLAN")], na.rm = TRUE)/elections_2003$dmr_LIS_NOM
  
  elections_2003$drp_valid_votes <- rowSums(elections_2003[, c( "drp_PAN",  "drp_PRI" , "drp_PRD", "drp_PT",  "drp_PVEM", "drp_CONVERG", "drp_PSN",
                                                                "drp_PAS", "drp_MP", "drp_PLM", "drp_FC")])
  
  #Candidaturas-Comunes:
  #CC01: PRI-PVEM-FC (Alvaro Obregon) CC02: PT-CONV-FC (Azcapotzalco) CC03: PRI-FC (Coyoacan) CC04: PRI-PVEM (Cuajimalpa) CC05: PRI-PVEM (Cuauhtemoc)
  #CC06: PT-PVEM-CONVERGENCIA (GAM) CC07: Iztacalco CC08 (the two iztacalco candidaturas are indistinguishable but neither includes a major party )Iztacalco 
  #CC09: PRI-PVEM (Iztapalapa) CC10: Alianza Social Convergencia (magdalena contreras) #CC11: PRI-FC (Miguel Hidalgo)
  
  #jefe delegacional vote shares 
  elections_2003$vspri_jd <- rowSums(elections_2003[, c("jd_PRI","jd_CC_01", "jd_CC_03", "jd_CC_04", "jd_CC_05", "jd_CC_09", "jd_CC_11")],na.rm = TRUE)/elections_2003$jd_valid_votes
  elections_2003$vspt_jd <- rowSums(elections_2003[, c("jd_CC_02", "jd_CC_06", "jd_PT")],na.rm = TRUE)/elections_2003$jd_valid_votes
  elections_2003$vspan_jd <- elections_2003$jd_PAN/elections_2003$jd_valid_votes
  elections_2003$vsprd_jd <- elections_2003$jd_PRD/elections_2003$jd_valid_votes
  elections_2003$vsmorena_jd <-NA

  #dmr vote shares 
  elections_2003$vspri_dmr <- rowSums(elections_2003[, c("dmr_PRI","dmr_CC_01", "dmr_CC_03", "dmr_CC_04")],na.rm = TRUE)/elections_2003$dmr_valid_votes
  elections_2003$vspt_dmr <- rowSums(elections_2003[, c("dmr_CC_02", "jd_PT")],na.rm = TRUE)/elections_2003$dmr_valid_votes
  elections_2003$vspan_dmr <- elections_2003$dmr_PAN/elections_2003$dmr_valid_votes
  elections_2003$vsprd_dmr <- elections_2003$dmr_PRD/elections_2003$dmr_valid_votes
  elections_2003$vsmorena_dmr <-NA
  
  #drp vote shares 
  elections_2003$vspri_drp <- elections_2003$drp_PRD/elections_2003$drp_valid_votes
  elections_2003$vspt_drp <- elections_2003$drp_PT/elections_2003$drp_valid_votes
  elections_2003$vspan_drp <- elections_2003$drp_PAN/elections_2003$drp_valid_votes
  elections_2003$vsprd_drp <- elections_2003$drp_PRD/elections_2003$drp_valid_votes
  elections_2003$vsmorena_drp <-NA
  
  # read in aldf first past the post (dmr) incumbents.We'll use these as the
  # incumbents in 2006, but it's important to load it in here because district
  # might change and you want to associate the seccion with who was their
  # incumbent
  
  elections_2003 <- elections_2003 %>% mutate(DTO_LOC= as.character(DTO_LOC))
  dmr_2003 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2003_outcomes.csv") %>% 
    select(Distrito,Partido) %>% 
    dplyr::rename(DTO_LOC= Distrito, winner_dmr=Partido)
  elections_2003 <- left_join(elections_2003,dmr_2003, by = "DTO_LOC")
  
  #combining all types
  elections_2003 <- elections_2003[,c("SECCION", "DEL", "NOM_DEL", "year", "vspri_jd", "vspt_jd", "vspan_jd", "vsprd_jd", "vsmorena_jd", "vspri_dmr","vspt_dmr", "vspan_dmr", "vsprd_dmr", "vsmorena_dmr","vspri_drp", "vspt_drp", "vspan_drp", "vsprd_drp", "vsmorena_drp","winner_dmr", "dmr_valid_votes", "jd_valid_votes", "jd_turnout", "dmr_turnout")]
  rm(dmr03_cas, dmr03_secc, drp03_cas, drp03_secc, jd03_cas, jd03_secc,dmr_2003)
  elections_2003 <- elections_2003 %>% rename(SECC=SECCION, DEL_NAME=NOM_DEL)
  elections_2003$SECC<- as.character(elections_2003$SECC)

  ########
  #2006
  ########
  #jefe de gobierno
  jg06_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/jg06_secc.xls", skip = 1)
  colnames(jg06_secc)[4:12] <- paste("jg", colnames(jg06_secc)[4:12], sep = "_")
  colnames(jg06_secc)[3] <- "SECC"
  
  #dmr
  dmr06_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/dmr06_secc.xls", skip = 1)
  colnames(dmr06_secc)[4:12] <- paste("dmr", colnames(dmr06_secc)[4:12], sep = "_")
  colnames(dmr06_secc)[3] <- "SECC"
  
  #drp 
  drp06_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/drp06_secc.xls", skip = 1)
  colnames(drp06_secc)[4:12] <- paste("drp", colnames(drp06_secc)[4:12], sep = "_")
  colnames(drp06_secc)[3] <- "SECC"
  
  #jd
  jd06_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/jd06_secc.xls", skip = 1)
  colnames(jd06_secc)[4:12] <- paste("jd", colnames(jd06_secc)[4:12], sep = "_")
  colnames(jd06_secc)[3] <- "SECC"
  
  #combining all types
  elections_2006 <- list(dmr06_secc, drp06_secc, jd06_secc, jg06_secc) %>% reduce(left_join, by = c("SECC", "DISTRITO", "DELEGACIÓN"))
  elections_2006$year <- 2006
  elections_2006$DEL <- factor(elections_2006$DELEGACIÓN, levels = c("ALVARO OBREGÓN", "AZCAPOTZALCO","BENITO JUÁREZ", "COYOACÁN", "CUAJIMALPA", 
                                                                     "CUAUHTÉMOC","GUSTAVO A. MADERO","IZTACALCO","IZTAPALAPA", "MAGDALENA CONTRERAS",
                                                                     "MIGUEL HIDALGO", "MILPA ALTA", "TLÁHUAC", "TLALPAN", "VENUSTIANO CARRANZA", "XOCHIMILCO"), 
                               labels =c("10", "2", "14", "3", "4", "15", "5", "6", "7", "8", "16", "9", "11", "12", "17", "13"))
  elections_2006$DEL <- as.numeric(as.character(elections_2006$DEL))
  elections_2006 <- ungroup(elections_2006)
  elections_2006 <- elections_2006 %>% mutate_if(is.integer, funs(replace(., is.na(.), 0)))
  #note: in 2006, jd_NA_PAN is the sum of PAN and NA. I replace it as null because there are no "extra" votes just for the CC
  elections_2006$`jd_Candidatura común PAN-NUEVA ALIANZA` <- NULL
  elections_2006$jd_valid_votes <- rowSums(elections_2006[,c("jd_PAN", "jd_PRI-PVEM" ,"jd_PRD-PT-CONV.",  "jd_NUEVA ALIANZA",                    
                                                             "jd_PASC")], na.rm=TRUE)
   elections_2006$jd_turnout <- rowSums(elections_2006[,c("jd_valid_votes", "jd_VOTOS NULOS")], na.rm=TRUE)/elections_2006$`jd_LISTA NOMINAL`
  elections_2006$jg_valid_votes <- rowSums(elections_2006[,c( "jg_PAN","jg_PRI-PVEM" , "jg_PRD-PT-CONV." ,"jg_NUEVA ALIANZA", "jg_PASC")], na.rm=TRUE)
  
  elections_2006$dmr_valid_votes <- rowSums(elections_2006[,c("dmr_PAN", "dmr_PRI-PVEM", "dmr_PRD-PT-CONV.", "dmr_NUEVA ALIANZA", "dmr_PASC" )], na.rm=TRUE)
  elections_2006$dmr_turnout <- rowSums(elections_2006[,c("dmr_valid_votes", "dmr_VOTOS NULOS")], na.rm=TRUE)/elections_2006$`dmr_LISTA NOMINAL`
  
  # check if valid votes is correct
  elections_2006$correct_jd <- ifelse(elections_2006$`VOTACIÓN TOTAL` == elections_2006$jd_valid_votes + elections_2006$`jd_VOTOS NULOS`,1,0)
  # check if valid votes < lista nominal
  # there is one district where something is clearly wrong with the data (Votación total > Lista Nominal; replace it with NA)
  elections_2006$off <-ifelse(elections_2006$`VOTACIÓN TOTAL` >elections_2006$`jd_LISTA NOMINAL`,1,0| elections_2006$`dmr_VOTACIÓN TOTAL`> elections_2006$`dmr_LISTA NOMINAL`)
  elections_2006[elections_2006$off ==1,]$dmr_turnout  <-NA
  elections_2006[elections_2006$off ==1,]$jd_turnout  <-NA
  
  elections_2006$drp_valid_votes <- rowSums(elections_2006[,c("drp_PAN", "drp_PRI-PVEM", "drp_PRD-PT-CONV.", "drp_NUEVA ALIANZA", "drp_PASC")], na.rm=TRUE)
  
  #calculating vote share for major parties 
  #jefe delegacional vote shares 
  elections_2006$vspri_jd <- elections_2006$'jd_PRI-PVEM'/elections_2006$jd_valid_votes
  elections_2006$vspt_jd <- elections_2006$'jd_PRD-PT-CONV.'/elections_2006$jd_valid_votes
  elections_2006$vspan_jd <-elections_2006$jd_PAN/elections_2006$jd_valid_votes
  elections_2006$vsprd_jd <- elections_2006$'jd_PRD-PT-CONV.'/elections_2006$jd_valid_votes
  elections_2006$vsmorena_jd <- NA
  
  #jefe de gobierno vote shares 
  elections_2006$vspan_jg <- elections_2006$jg_PAN/elections_2006$jg_valid_votes
  elections_2006$vspt_jg <- elections_2006$'jg_PRD-PT-CONV.'/elections_2006$jg_valid_votes
  elections_2006$vspri_jg <- elections_2006$'jg_PRI-PVEM'/elections_2006$jg_valid_votes
  elections_2006$vsprd_jg <-elections_2006$'jg_PRD-PT-CONV.'/elections_2006$jg_valid_votes
  elections_2006$vsmorena_jg <- NA
  
  #dmr vote shares
  elections_2006$vspri_dmr <- elections_2006$'dmr_PRI-PVEM'/elections_2006$dmr_valid_votes
  elections_2006$vspt_dmr <- elections_2006$'dmr_PRD-PT-CONV.'/elections_2006$dmr_valid_votes
  elections_2006$vspan_dmr <-elections_2006$dmr_PAN/elections_2006$dmr_valid_votes
  elections_2006$vsprd_dmr <- elections_2006$'dmr_PRD-PT-CONV.'/elections_2006$dmr_valid_votes
  elections_2006$vsmorena_dmr <- NA
  
  #drp vote shares
  elections_2006$vspri_drp <- elections_2006$'drp_PRI-PVEM'/elections_2006$drp_valid_votes
  elections_2006$vspt_drp <- elections_2006$'drp_PRD-PT-CONV.'/elections_2006$drp_valid_votes
  elections_2006$vspan_drp <-elections_2006$drp_PAN/elections_2006$drp_valid_votes
  elections_2006$vsprd_drp <- elections_2006$'drp_PRD-PT-CONV.'/elections_2006$drp_valid_votes
  elections_2006$vsmorena_drp <- NA
  
  # read in aldf first past the post (dmr) incumbents.
  dmr_2006 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2006_outcomes.csv") %>%
    select(Distrito, Partido) %>% 
    rename(DISTRITO = Distrito, winner_dmr=Partido)
   dmr_2006$DISTRITO <- str_replace(string=dmr_2006$DISTRITO,replacement="",pattern="Distrito ")
  elections_2006 <-left_join(elections_2006,dmr_2006,by="DISTRITO")
  
  elections_2006 <- elections_2006[,c("DELEGACIÓN", "SECC", "year", "DEL", "vspri_jd", "vspt_jd", "vspan_jd", "vsprd_jd", "vsmorena_jd", "vspri_jg","vspt_jg", "vspan_jg",  "vsprd_jg", "vsmorena_jg", "vspri_dmr","vspt_dmr", "vspan_dmr",  "vsprd_dmr", "vsmorena_dmr",  "vspri_drp","vspt_drp", "vspan_drp", "vsprd_drp", "vsmorena_drp", "winner_dmr", "dmr_valid_votes", "jd_valid_votes", "jg_valid_votes", "jd_turnout", "dmr_turnout")]
  rm(dmr06_secc, drp06_secc, jd06_secc, jg06_secc, dmr_2006)
  elections_2006 <- elections_2006 %>% dplyr::rename(DEL_NAME = DELEGACIÓN)
  elections_2006$SECC<- as.character(elections_2006$SECC)
  
  ##########
  #2009
  ##########
  
  #dmr
  dmr09_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/dmr09_secc.xls",skip = 9, n_max = 5526)
  colnames(dmr09_secc)[4:18] <- paste("dmr", colnames(dmr09_secc)[4:18], sep = "_")
  colnames(dmr09_secc)[3] <- "SECC"
  
  #drp 
  drp09_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/drp09_secc.xls",skip = 7, n_max = 5526)
  colnames(drp09_secc)[4:15] <- paste("drp", colnames(drp09_secc)[4:15], sep = "_")
  colnames(drp09_secc)[3] <- "SECC"
  
  #jd
  jd09_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/jd09_secc.xls", skip = 8, n_max = 5526)
  colnames(jd09_secc)[4:19] <- paste("jd", colnames(jd09_secc)[4:19], sep = "_")
  colnames(jd09_secc)[3] <- "SECC"
  
  #combining all types
  elections_2009 <- list(dmr09_secc, drp09_secc, jd09_secc) %>% reduce(left_join, by = c("SECC", "Distrito", "Delegación"))
  elections_2009$year <- 2009
  
  #note- the candidaturas comunes totals parties and votes for the original
  #vote- the code below fixes this by replacing these totals with ONLY the
  #common candidacy votes
  elections_2009$`jd_Candidatura Común PRD_Convergencia` <- ifelse(elections_2009$`jd_Candidatura Común PRD_Convergencia` >0, elections_2009$`jd_Candidato Común`, 0)
  elections_2009$`jd_Candidatura Común PRD_PT_Convergencia` <- ifelse(elections_2009$`jd_Candidatura Común PRD_PT_Convergencia` >0, elections_2009$`jd_Candidato Común`, 0)
  elections_2009$`jd_Candidatura Común PRD_PT` <-ifelse(elections_2009$`jd_Candidatura Común PRD_PT` >0, elections_2009$`jd_Candidato Común`, 0)
  elections_2009$`jd_Candidatura Común PT_Convergencia` <- ifelse(elections_2009$`jd_Candidatura Común PT_Convergencia` >0, elections_2009$`jd_Candidato Común`, 0)
  
  elections_2009$`dmr_Candidatura Común PRD_PT_Convergencia` <- ifelse(elections_2009$`dmr_Candidatura Común PRD_PT_Convergencia` >0, elections_2009$`dmr_Candidato Común`, 0)
  elections_2009$`dmr_Candidatura Común PRD_Convergencia` <- ifelse(elections_2009$`dmr_Candidatura Común PRD_Convergencia` >0, elections_2009$`dmr_Candidato Común`, 0)
  
  elections_2009 <- elections_2009 %>% mutate_if(is.integer, funs(replace(., is.na(.), 0)))
  elections_2009$DEL <- factor(elections_2009$Delegación, levels = c("Álvaro Obregón","Azcapotzalco","Benito Juárez", "Coyoacán", "Cuajimalpa de Morelos",
                                                                     "Cuauhtémoc","Gustavo A. Madero","Iztacalco", "Iztapalapa", "La Magdalena Contreras",
                                                                     "Miguel Hidalgo","Milpa Alta", "Tláhuac", "Tlalpan", "Venustiano Carranza", "Xochimilco"), 
                               labels =c("10", "2", "14", "3", "4", "15", "5", "6", "7", "8", "16", "9", "11", "12", "17", "13"))
  elections_2009$DEL <- as.numeric(as.character(elections_2009$DEL))
  
  
  #note- we can check that this is right given that it equals votacion total - votos nulos 
  elections_2009$jd_valid_votes <- rowSums(elections_2009[,c(  "jd_PAN" , "jd_PRI" ,  "jd_PRD" ,  "jd_PT" , "jd_PVEM" , "jd_Convergencia" ,  "jd_Nueva Alianza" ,  "jd_PSD" , "jd_Candidato Común")])
  elections_2009$correct_jd <- ifelse(elections_2009$`Votación Total` == elections_2009$jd_valid_votes + elections_2009$`jd_Votos Nulos`,1,0)
  elections_2009$jd_turnout <-(elections_2009$jd_valid_votes + elections_2009$`jd_Votos Nulos`)/elections_2009$`jd_Lista Nominal`
  
  elections_2009$dmr_valid_votes <- rowSums(elections_2009[,c("dmr_PAN","dmr_PRI","dmr_PRD", "dmr_PT","dmr_PVEM", "dmr_Convergencia" , "dmr_Nueva Alianza", "dmr_PSD",
                                                              "dmr_Candidatura Común PRD_PT_Convergencia", "dmr_Candidatura Común PRD_Convergencia")])
  elections_2009$correct_dmr <- ifelse(elections_2009$`dmr_Votación Total` == elections_2009$dmr_valid_votes + elections_2009$`dmr_Votos Nulos`,1,0)
  elections_2009$dmr_turnout <- elections_2009$`Votación Total`/elections_2009$`dmr_Lista Nominal`
  
  
  elections_2009$drp_valid_votes <- rowSums(elections_2009[,c( "drp_PAN" ,"drp_PRI", "drp_PRD" , "drp_PVEM" ,  "drp_Nueva Alianza", "drp_Convergencia","drp_PSD")])
  
  
  #jefe delegacional vote shares 
  elections_2009$vspri_jd <- elections_2009$jd_PRI/elections_2009$jd_valid_votes
  elections_2009$vspt_jd <- rowSums(elections_2009[, c("jd_PT","jd_Candidatura Común PRD_PT_Convergencia","jd_Candidatura Común PRD_PT", "jd_Candidatura Común PT_Convergencia" )],na.rm = TRUE)/elections_2009$jd_valid_votes
  elections_2009$vspan_jd <- elections_2009$jd_PAN/elections_2009$jd_valid_votes
  elections_2009$vsprd_jd <- rowSums(elections_2009[, c("jd_PRD","jd_Candidatura Común PRD_PT_Convergencia", "jd_Candidatura Común PRD_PT")],na.rm = TRUE)/elections_2009$jd_valid_votes
  elections_2009$vsmorena_jd <-NA
  
  # dmr vote shares 
  elections_2009$vspri_dmr <- elections_2009$dmr_PRI/elections_2009$dmr_valid_votes
  elections_2009$vspt_dmr <- rowSums(elections_2009[, c("dmr_PT", "dmr_Candidatura Común PRD_PT_Convergencia")],na.rm = TRUE)/elections_2009$dmr_valid_votes
  elections_2009$vspan_dmr <- elections_2009$dmr_PAN/elections_2009$dmr_valid_votes
  elections_2009$vsprd_dmr <- rowSums(elections_2009[, c("dmr_PRD","dmr_Candidatura Común PRD_PT_Convergencia", "dmr_Candidatura Común PRD_Convergencia")],na.rm = TRUE)/elections_2009$dmr_valid_votes
  elections_2009$vsmorena_dmr <-NA
  
  # drp vote shares 
  elections_2009$vspri_drp <- elections_2009$drp_PRI/elections_2009$drp_valid_votes
  elections_2009$vspt_drp <- elections_2009$drp_PT/elections_2009$drp_valid_votes
  elections_2009$vspan_drp <- elections_2009$drp_PAN/elections_2009$drp_valid_votes
  elections_2009$vsprd_drp <-elections_2009$drp_PRD/elections_2009$drp_valid_votes
  elections_2009$vsmorena_drp <-NA
  
  # read in aldf fpp (dmr)
  dmr_2009 <-read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2009_outcomes.csv") %>% 
    select(Distrito, Partido) %>% 
    rename(winner_dmr = Partido)
  
  elections_2009<- left_join(elections_2009, dmr_2009,by="Distrito")
  
  elections_2009 <- elections_2009[,c("SECC", "Delegación", "DEL", "year", "vspri_jd", "vspt_jd", "vspan_jd", "vsprd_jd", "vsmorena_jd", "vspri_dmr","vspt_dmr", "vspan_dmr", "vsprd_dmr", "vsmorena_dmr", "vspri_drp", "vspt_drp","vspan_drp", "vsprd_drp", "vsmorena_drp", "winner_dmr", "dmr_valid_votes", "jd_valid_votes", "jd_turnout", "dmr_turnout")]
  rm( dmr09_secc,  drp09_secc,  jd09_secc,dmr_2009)
  elections_2009 <- elections_2009 %>% rename(DEL_NAME = Delegación)
  elections_2009$SECC<- as.character(elections_2009$SECC)
  
  ##########
  #2012
  ##########
  #jg
  jg12_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/jg12_secc.xlsx",  skip = 1, n_max = 5528)
  colnames(jg12_secc)[5:16] <- paste("jg", colnames(jg12_secc)[5:16], sep = "_")
  colnames(jg12_secc)[2] <- "CLAVE DELEGACIÓN"
  jg12_secc[jg12_secc$DELEGACIÓN == "COYOACÁN","DELEGACIÓN"] <- "COYOACAN"
  jg12_secc <- jg12_secc %>% select(-c("DISTRITO","CLAVE DELEGACIÓN","DELEGACIÓN" ))
  
  #dmr
  dmr12_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/dmr12_secc.xlsx",skip = 1, n_max = 5526)
  colnames(dmr12_secc)[5:17] <- paste("dmr", colnames(dmr12_secc)[5:17], sep = "_")
  dmr12_secc <- dmr12_secc %>%  select(-c("DISTRITO", "CLAVE DE\r\nDELEGACIÓN","DELEGACIÓN"))
  
  #drp
  drp12_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/drp12_secc.xlsx",skip = 1, n_max = 5526)
  drp12_secc$CASILLA <- NULL
  colnames(drp12_secc)[5:15] <- paste("drp", colnames(drp12_secc)[5:15], sep = "_")
  drp12_secc <- drp12_secc %>% select(-c("DISTRITO", "CLAVE DE\r\nDELEGACIÓN","DELEGACIÓN"))
  
  #jd
  jd12_secc <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/jd12_secc.xlsx",skip = 1, n_max = 5527)
  colnames(jd12_secc)[5:17] <- paste("jd", colnames(jd12_secc)[5:17], sep = "_")
  colnames(jd12_secc)[2] <- "CLAVE DELEGACIÓN"

  #combining all types
  elections_2012 <- list(dmr12_secc, drp12_secc, jd12_secc, jg12_secc) %>% reduce(left_join, by = c("SECCIÓN"))
  elections_2012$year <- 2012
  elections_2012 <- ungroup(elections_2012)
  
  elections_2012 <- elections_2012 %>% mutate_if(is.integer, funs(replace(., is.na(.), 0)))
  elections_2012$jd_valid_votes <- rowSums(elections_2012[,c("jd_PAN",  "jd_PRI", "jd_PRD", "jd_PT", "jd_PVEM" , "jd_MC", "jd_NA"  , "jd_PRI-PVEM CC1",  "jd_PRD-PT-MC CC2"  )], na.rm= TRUE)
  elections_2012$jd_turnout <- rowSums(elections_2012[,c("jd_valid_votes","jd_VOTOS\r\nNULOS" )], na.rm= TRUE)/elections_2012$`jd_LISTA NOMINAL`
  
  elections_2012$jg_valid_votes <- rowSums(elections_2012[,c("jg_PAN", "jg_PRI" , "jg_PRD" , "jg_PT" , "jg_PVEM" , "jg_MC", "jg_NA" ,"jg_PRI-PVEM\r\nCC1", "jg_PRD-PT-MC\r\nCC2")], na.rm=TRUE)
  elections_2012$dmr_valid_votes <- rowSums(elections_2012[,c(  "dmr_PAN"  , "dmr_PRI" , "dmr_PRD" ,"dmr_PT", "dmr_PVEM", "dmr_MC", "dmr_NA", "dmr_PRI-PVEM CC1", "dmr_PRD-PT-MC CC2")], na.rm=TRUE)
  elections_2012$dmr_turnout <- rowSums(elections_2012[,c("dmr_valid_votes","dmr_VOTOS\r\nNULOS" )], na.rm= TRUE)/elections_2012$`dmr_LISTA NOMINAL`
  
   # check if valid votes < lista nominal
  elections_2012$off <-ifelse(elections_2012$`jd_VOTACION\r\nTOTAL` >elections_2012$`jd_LISTA NOMINAL`,1,0| elections_2012$`dmr_VOTACION\r\nTOTAL`> elections_2012$`dmr_LISTA NOMINAL`)
  # there are two district where something is clearly wrong with the data (Votación total > Lista Nominal; replace it with NA)
  elections_2012$dmr_turnout <-  ifelse(elections_2012$off ==1, NA, elections_2012$dmr_turnout)
  elections_2012$jd_turnout <-  ifelse(elections_2012$off ==1, NA, elections_2012$jd_turnout)
  
  

  elections_2012$drp_valid_votes <- rowSums(elections_2012[,c( "drp_PAN" ,"drp_PRI", "drp_PRD" , "drp_PT" , "drp_PVEM", "drp_MC", "drp_NA")], na.rm=TRUE)
  
  #jefe delegacional vote shares 
  elections_2012$vspri_jd <-rowSums(elections_2012[, c("jd_PRI","jd_PRI-PVEM CC1")],na.rm = TRUE)/elections_2012$jd_valid_votes
  elections_2012$vspt_jd <-rowSums(elections_2012[, c("jd_PT", "jd_PRD-PT-MC CC2")],na.rm = TRUE)/elections_2012$jd_valid_votes
  elections_2012$vspan_jd <-elections_2012$jd_PAN/elections_2012$jd_valid_votes
  elections_2012$vsprd_jd <-rowSums(elections_2012[, c("jd_PRD","jd_PRD-PT-MC CC2")],na.rm = TRUE)/elections_2012$jd_valid_votes
  elections_2012$vsmorena_jd <- NA
  
  #jefe de gobierno vote shares 
  elections_2012$vspan_jg <- elections_2012$jg_PAN/elections_2012$jg_valid_votes
  elections_2012$vspri_jg <-rowSums(elections_2012[, c("jg_PRI","jg_PRI-PVEM\r\nCC1")],na.rm = TRUE)/elections_2012$jg_valid_votes
  elections_2012$vspt_jg <-rowSums(elections_2012[, c("jg_PT","jg_PRD-PT-MC\r\nCC2")],na.rm = TRUE)/elections_2012$jg_valid_votes
  elections_2012$vsprd_jg <-rowSums(elections_2012[, c("jg_PRD-PT-MC\r\nCC2")],na.rm = TRUE)/elections_2012$jg_valid_votes
  elections_2012$vsmorena_jg <- NA
  
  #dmr vote shares 
  elections_2012$vspan_dmr <- elections_2012$dmr_PAN/elections_2012$dmr_valid_votes
  elections_2012$vspri_dmr <-rowSums(elections_2012[, c("dmr_PRI","dmr_PRI-PVEM CC1")],na.rm = TRUE)/elections_2012$dmr_valid_votes
  elections_2012$vspt_dmr <-rowSums(elections_2012[, c("dmr_PT","dmr_PRD-PT-MC CC2")],na.rm = TRUE)/elections_2012$dmr_valid_votes
  elections_2012$vsprd_dmr <-rowSums(elections_2012[, c("dmr_PRD","dmr_PRD-PT-MC CC2" )],na.rm = TRUE)/elections_2012$dmr_valid_votes
  elections_2012$vsmorena_dmr <- NA
  
  #drp vote shares 
  elections_2012$vspan_drp <- elections_2012$drp_PAN/elections_2012$drp_valid_votes
  elections_2012$vspri_drp <-elections_2012$drp_PRI/elections_2012$drp_valid_votes
  elections_2012$vspt_drp <-elections_2012$drp_PT/elections_2012$drp_valid_votes
  elections_2012$vsprd_drp <-elections_2012$drp_PRD/elections_2012$drp_valid_votes
  elections_2012$vsmorena_drp <- NA
  
  #read in aldf fpp (dmr)
  dmr_2012 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2012_outcomes.csv") %>% 
    select(Distrito,Partido) %>% 
    rename(DISTRITO =Distrito, winner_dmr = Partido)
  
  elections_2012 <- left_join(elections_2012, dmr_2012,by="DISTRITO")
  
  elections_2012 <- elections_2012[,c("CLAVE DELEGACIÓN", "SECCIÓN", "year", "DELEGACIÓN", "vspri_jd","vspt_jd", "vspan_jd", "vsprd_jd", "vsmorena_jd", "vspri_jg","vspt_jg", "vspan_jg", "vsprd_jg", "vsmorena_jg","vspri_dmr","vspt_dmr", "vspan_dmr", "vsprd_dmr", "vsmorena_dmr", "vspri_drp","vspt_drp","vspan_drp", "vsprd_drp", "vsmorena_drp","winner_dmr", "dmr_valid_votes", "jd_valid_votes", "jg_valid_votes", "jd_turnout", "dmr_turnout")]
  rm(dmr12_secc, drp12_secc, jd12_secc, jg12_secc, dmr_2012)
  
  elections_2012 <- elections_2012 %>%  rename(DEL = 'CLAVE DELEGACIÓN', DEL_NAME = DELEGACIÓN, SECC= SECCIÓN)
  elections_2012$SECC<- as.character(elections_2012$SECC)
  
  #########
  #2015
  #########
  #dmr
  dmr15_secc <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/dmr15_secc.csv")
  colnames(dmr15_secc)[4:27] <- paste("dmr", colnames(dmr15_secc)[4:27], sep = "_")
  colnames(dmr15_secc)[3] <- "SECC"
  
  #drp 
  drp15_secc <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/drp15_secc.csv")
  colnames(drp15_secc)[4:30] <- paste("drp", colnames(drp15_secc)[4:30], sep = "_")
  colnames(drp15_secc)[3] <- "SECC"
  
  #jd
  jd15_secc <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/jd15_secc.csv")
  colnames(jd15_secc)[4:30] <- paste("jd", colnames(jd15_secc)[4:30], sep = "_")
  colnames(jd15_secc)[3] <- "SECC"
  
  #combining all types
  elections_2015 <- list(dmr15_secc, drp15_secc, jd15_secc) %>% reduce(left_join, by = c("DEL", "DTTO", "SECC"))
  elections_2015$year <- 2015
  elections_2015$DISTRITO <- factor(elections_2015$DTTO, levels = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26, 27, 28, 29,30, 31, 32, 33, 34,35, 36, 37, 38, 39, 40),
                                    labels = c( "I", "II", "III",  "IV",  "V","VI", "VII", "VIII", "IX", "X", "XI","XII", "XIII","XIV","XV", "XVI", "XVII", "XVIII", "XIX","XX",  "XXI",   
                                                "XXII","XXIII", "XXIV","XXV","XXVI","XXVII","XXVIII","XXIX", "XXX","XXXI", "XXXII","XXXIII", "XXXIV", "XXXV", "XXXVI", "XXXVII", "XXXVIII", "XXXIX", "XXXX"))
  elections_2015 <- as.data.frame(elections_2015)
  elections_2015$DTTO <- as.character(elections_2015$DISTRITO)
  elections_2015$DISTRITO <- NULL
  
  elections_2015 <- ungroup(elections_2015)
  elections_2015 <- elections_2015 %>% mutate_if(is.integer, funs(replace(., is.na(.), 0)))
  elections_2015$DEL <- as.numeric(elections_2015$DEL)
  elections_2015$DEL_NAME <- factor(elections_2015$DEL, levels = c("10", "2", "14", "3", "4", "15", "5", "6", "7", "8", "16", "9", "11", "12", "17", "13"), 
                                    labels =c("ALVARO OBREGON", "AZCAPOTZALCO","BENITO JUAREZ", "COYOACAN", "CUAJIMALPA DE MORELOS", 
                                              "CUAUHTEMOC","GUSTAVO A. MADERO","IZTACALCO","IZTAPALAPA", "LA MAGDALENA CONTRERAS",
                                              "MIGUEL HIDALGO", "MILPA ALTA ", "TLAHUAC", "TLALPAN", "VENUSTIANO CARRANZA", "XOCHIMILCO"))
  # we have one seccion where the lista nominal is recorded as 0 but there are
  # votes - switch that to NA because it must be wrong
  elections_2015$dmr_LN <- ifelse(elections_2015$dmr_LN ==0, NA, elections_2015$dmr_LN)
  elections_2015$jd_LN <- ifelse(elections_2015$jd_LN ==0, NA, elections_2015$jd_LN)
  
    elections_2015$jd_valid_votes <-rowSums(elections_2015[,c(   "jd_PAN" ,  "jd_PRI" , "jd_PRD" , "jd_PVEM" , "jd_PT" ,"jd_MC", "jd_NA." ,"jd_MORENA","jd_PH" ,
                                                               "jd_ES" , "jd_CI1_4","jd_CI1_5" ,  "jd_CI1_16", "jd_CI1_9" , "jd_CI2_9", "jd_CI3_9"    , "jd_CI4_9" ,
                                                               "jd_PRI_PVEM", "jd_PRD_PT_NA","jd_PRD_PT" ,  "jd_PRD_NA" , "jd_PT_NA" , "jd_CNR" )])
  elections_2015$jd_turnout <-rowSums(elections_2015[,c("jd_valid_votes", "jd_VN")])/elections_2015$jd_LN
  
  elections_2015$dmr_valid_votes <-rowSums(elections_2015[,c("dmr_PAN" , "dmr_PRI",  "dmr_PRD", "dmr_PVEM" , "dmr_PT" , "dmr_MC" , "dmr_NA.",  "dmr_MORENA")])
  elections_2015$dmr_turnout <-rowSums(elections_2015[,c("dmr_valid_votes", "dmr_VN")])/elections_2015$dmr_LN
  
  elections_2015$drp_valid_votes <- rowSums(elections_2015[,c("drp_PAN", "drp_PRI", "drp_PRD", "drp_PVEM", "drp_PT",  "drp_MC" , "drp_NA.",      
                                                              "drp_MORENA", "drp_PH" ,  "drp_ES", "drp_CI1_4", "drp_CI1_5" , "drp_CI1_16" , "drp_CI1_9",    
                                                               "drp_PRI_PVEM", "drp_PRD_PT_NA", "drp_PRD_PT", "drp_PRD_NA", "drp_PT_NA","drp_CNR")])
  
  #jefe delegacional vote shares 
  elections_2015$vspri_jd <-rowSums(elections_2015[, c("jd_PRI","jd_PRI_PVEM")],na.rm = TRUE)/elections_2015$jd_valid_votes
  elections_2015$vspt_jd <-rowSums(elections_2015[, c("jd_PT","jd_PRD_PT_NA","jd_PRD_PT", "jd_PT_NA")],na.rm = TRUE)/elections_2015$jd_valid_votes
  elections_2015$vspan_jd <-elections_2015$jd_PAN/elections_2015$jd_valid_votes
  elections_2015$vsprd_jd <-rowSums(elections_2015[, c("jd_PRD","jd_PRD_PT_NA","jd_PRD_NA" )],na.rm = TRUE)/elections_2015$jd_valid_votes
  elections_2015$vsmorena_jd <- elections_2015$jd_MORENA/elections_2015$jd_valid_votes
  
  #dmr vote shares 
  elections_2015$vspri_dmr <-rowSums(elections_2015[, c("dmr_PRI","dmr_PRI_PVEM")],na.rm = TRUE)/elections_2015$dmr_valid_votes
  elections_2015$vspt_dmr <-rowSums(elections_2015[, c("dmr_PT","dmr_PRD_PT_NA")],na.rm = TRUE)/elections_2015$dmr_valid_votes
  elections_2015$vspan_dmr <-elections_2015$dmr_PAN/elections_2015$dmr_valid_votes
  elections_2015$vsprd_dmr <-rowSums(elections_2015[, c("dmr_PRD","dmr_PRD_PT_NA","dmr_PRD_PT","dmr_PRD_NA" )],na.rm = TRUE)/elections_2015$dmr_valid_votes
  elections_2015$vsmorena_dmr <- elections_2015$dmr_MORENA/elections_2015$dmr_valid_votes
  
  #drp vote shares 
  elections_2015$vspri_drp <-rowSums(elections_2015[, c("drp_PRD","drp_PRD_PT_NA", "drp_PRD_PT", "drp_PRD_NA")],na.rm = TRUE)/elections_2015$drp_valid_votes
  elections_2015$vspt_drp <-rowSums(elections_2015[, c("drp_PT","drp_PRD_PT_NA", "drp_PT_NA")],na.rm = TRUE)/elections_2015$drp_valid_votes
  elections_2015$vspan_drp <-elections_2015$drp_PAN/elections_2015$drp_valid_votes
  elections_2015$vsprd_drp <-rowSums(elections_2015[, c("drp_PRD","drp_PRD_PT","drp_PRD_NA")],na.rm = TRUE)/elections_2015$drp_valid_votes
  elections_2015$vsmorena_drp <- elections_2015$drp_MORENA/elections_2015$drp_valid_votes
  
  # read  in aldf fpp (dmr)
  dmr_2015 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2015_outcomes.csv") %>% 
    select(Distrito,Partido) %>% 
    rename(DTTO=Distrito,winner_dmr = Partido)
  
  elections_2015 <- left_join(elections_2015, dmr_2015,by = "DTTO")
  
  elections_2015 <- elections_2015[,c("SECC", "DEL_NAME", "DEL", "year", "vspri_jd","vspt_jd", "vspan_jd", "vsprd_jd", "vsmorena_jd",  "vspri_dmr","vspt_dmr", "vspan_dmr", "vsprd_dmr",  "vsmorena_dmr", "vspri_drp","vspt_drp", "vspan_drp", "vsprd_drp", "vsmorena_drp", "winner_dmr", "dmr_valid_votes", "jd_valid_votes", "jd_turnout", "dmr_turnout")]
  rm(dmr15_secc, drp15_secc, jd15_secc, dmr_2015)
  elections_2015$SECC<- as.character(elections_2015$SECC)
  
  #########
  #2018
  #########
  #dmr
  dmr18_cas <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_18_cas.csv", skip = 8, stringsAsFactors = FALSE)
  colnames(dmr18_cas)[10:48] <- paste("dmr", colnames(dmr18_cas)[10:48], sep = "_")
  colnames(dmr18_cas)[6] <- "SECC"
  #get rid of "section 0" which is total
  dmr18_cas[dmr18_cas$SECC == 0,] <- NULL
  #get to seccion (Rather than casilla)
  dmr18_cas <- dmr18_cas[,c(5,6,10:45)]
  dmr_18_secc <- dmr18_cas %>% group_by(Distrito, SECC) %>% summarise_all(funs(sum))
  
  #alcaldias (still name the file jd for consistency, but they're really alcaldes)
  alc18_cas <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/alc_2018_cas.csv", skip = 8, stringsAsFactors = FALSE)
  colnames(alc18_cas)[10:39] <- paste("jd", colnames(alc18_cas)[10:39], sep = "_")
  colnames(alc18_cas)[6] <- "SECC"
  #get rid of "section 0" which is total
  alc18_cas[alc18_cas$SECC == 0,] <- NULL
  #get to seccion (Rather than casilla)
  alc18_cas <- alc18_cas[,c(5,6,10:39)]
  jd_18_secc <- alc18_cas %>% group_by(Distrito, SECC) %>% summarise_all(funs(sum))
  
  #JG
  jg18_cas <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/jg_18_cas.csv", skip = 8, stringsAsFactors = FALSE)
  colnames(jg18_cas)[10:38] <- paste("jg", colnames(jg18_cas)[10:38], sep = "_")
  colnames(jg18_cas)[6] <- "SECC"
  jg18_cas <- jg18_cas[2:12976,]
  #get to seccion (Rather than casilla)
  jg18_cas <- jg18_cas[,c(5,6,10:35)]
  jg_18_secc <- jg18_cas %>% group_by(Distrito, SECC) %>% summarise_all(funs(sum))
  
  elections_2018 <- list(dmr_18_secc,  jd_18_secc, jg_18_secc) %>% reduce(left_join, by = c("Distrito", "SECC"))
  elections_2018$year <- 2018
  elections_2018$DISTRITO <- NA
  colnames(elections_2018)[1] <- "Distrito_18"
  elections_2018 <- as.data.frame(elections_2018)
  elections_2018 <- ungroup(elections_2018)
  elections_2018 <- elections_2018 %>% mutate_if(is.integer, funs(replace(., is.na(.), 0)))
  #get delegations for 2018
  sec_del <- unique(elections_2015[,c("SECC", "DEL", "DEL_NAME")])
  sec_del$SECC <- as.numeric(sec_del$SECC)
  elections_2018 <- left_join(elections_2018, sec_del, by = "SECC")
  rm(list = c("jd03_cas", "jd03_secc", "jd06_secc", "jd09_secc", "jd12_secc", "jd15_secc", "dmr03_cas", "dmr03_secc", "dmr06_secc", "dmr09_secc", "dmr12_secc", "dmr15_secc", "drp03_cas", "drp03_secc", "drp06_secc",   "drp09_secc", "drp12_secc", "drp15_secc", "jg06_secc", "jg12_secc", "alc18_cas",  "jd_18_secc", "jg18_cas", "jg_18_secc", "dmr_18_secc", "dmr18_cas" ))
  # read in lista nominal, which is separate. Note that we can only get the
  # lista nominal for jefe de gobierno. The list should be effectively identical
  # (even if turnout is different), so I'm using it as the denominator for both
  # jg and dmr, although in theory they could be slightly different
  
  ln_18 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/jg_ln_2018.csv", col_names = FALSE) %>% 
    rename(SECC = X1, percent = X2, LN = X3) %>% select(SECC, LN)
  ln_18$SECC <- str_replace(ln_18$SECC, "Secc. ", "") %>% as.numeric()
  elections_2018 <- left_join(elections_2018, ln_18, by = "SECC")
  
  elections_2018$jd_valid_votes <- elections_2018$jd_total.personas.votaron - elections_2018$jd_Nulos
  elections_2018$jd_turnout <- rowSums(elections_2018[,c("jd_valid_votes","jd_Nulos")],na.rm=TRUE)/elections_2018$LN
  
  elections_2018$dmr_valid_votes <- elections_2018$dmr_total.personas.votaron - elections_2018$dmr_Nulos
  elections_2018$dmr_turnout <- rowSums(elections_2018[,c("dmr_valid_votes","dmr_Nulos")],na.rm=TRUE)/elections_2018$LN
  
  elections_2018$jg_valid_votes <- elections_2018$jg_total.personas.votaron - elections_2018$jg_Nulos
  
  
  # check if votes > ln
  elections_2018$off <- ifelse(elections_2018$jd_total.personas.votaron > elections_2018$LN|elections_2018$dmr_total.personas.votaron > elections_2018$LN ,1,0)
  elections_2018$dmr_turnout <- ifelse(elections_2018$off ==1, NA, elections_2018$dmr_turnout)
  elections_2018$jd_turnout <- ifelse(elections_2018$off ==1, NA, elections_2018$jd_turnout)
  
  #jefe delegacional vote shares 
  #note that in 2018, some of these coalitions begin to be shared across the major parties; so vote shares will total more than 100%. think more about this
  #this won't have a huge impact on incumbment vote share, but it makes using the parties tough
  elections_2018$vspri_jd <-elections_2018$jd_PRI/elections_2018$jd_valid_votes
  elections_2018$vspan_jd <-rowSums(elections_2018[, c("jd_PAN","jd_Coalición..PAN.PRD.MC.", "jd_Coalición..PAN.PRD.", "jd_Coalición..PAN.MC.")],na.rm = TRUE)/elections_2018$jd_valid_votes
  elections_2018$vspt_jd <-rowSums(elections_2018[, c("jd_PT","jg_C.Común..PT.MORENA.PES.", "jg_C.Común..PT.MORENA.", "jg_C.Común..PT.PES.")],na.rm = TRUE)/elections_2018$jd_valid_votes
  elections_2018$vsprd_jd <-rowSums(elections_2018[, c("jd_PRD", "jd_Coalición..PAN.PRD.MC.", "jd_Coalición..PAN.PRD.", "jd_Coalición..PRD.MC.")],na.rm = TRUE)/elections_2018$jd_valid_votes
  elections_2018$vsmorena_jd <- rowSums(elections_2018[, c("jd_MORENA", "jd_C.Común..PT.MORENA.PES.", "jd_C.Común..PT.MORENA.","jd_C.Común..MORENA.PES." )],na.rm = TRUE)/elections_2018$jd_valid_votes
  
  #jefe de gobierno vote shares 
  elections_2018$vspan_jg <-rowSums(elections_2018[, c("jg_PAN", "jg_Coalición..PAN.PRD.MC.","jg_Coalición..PAN.PRD.", "jg_Coalición..PAN.MC.")],na.rm = TRUE)/elections_2018$jg_valid_votes
  elections_2018$vspt_jg <-rowSums(elections_2018[, c("jg_PT", "jg_Coalición..PAN.PRD.MC.","jg_Coalición..PAN.PRD.", "jg_Coalición..PAN.MC.")],na.rm = TRUE)/elections_2018$jg_valid_votes
  elections_2018$vspri_jg <-elections_2018$jg_PRI/elections_2018$jg_valid_votes
  elections_2018$vsprd_jg <-rowSums(elections_2018[, c("jg_PRD", "jg_Coalición..PAN.PRD.MC.", "jg_Coalición..PAN.PRD.", "jg_Coalición..PRD.MC.")],na.rm = TRUE)/elections_2018$jg_valid_votes
  elections_2018$vsmorena_jg <- rowSums(elections_2018[, c("jg_MORENA", "jg_C.Común..PT.MORENA.PES.", "jg_C.Común..PT.MORENA.", "jg_C.Común..MORENA.PES.")],na.rm = TRUE)/elections_2018$jg_valid_votes
  
  #dmr vote shares 
  elections_2018$vspan_dmr <-rowSums(elections_2018[, c("dmr_PAN", "dmr_Coalición..PAN.PRD.MC.","dmr_Coalición..PAN.PRD.", "dmr_Coalición..PAN.MC.")],na.rm = TRUE)/elections_2018$dmr_valid_votes
  elections_2018$vspt_dmr <-rowSums(elections_2018[, c("dmr_PT", "dmr_C.Común..PT.MORENA.PES.","dmr_C.Común..PT.MORENA.", "dmr_C.Común..PT.PES.")],na.rm = TRUE)/elections_2018$dmr_valid_votes
  elections_2018$vspri_dmr <-elections_2018$dmr_PRI/elections_2018$dmr_valid_votes
  elections_2018$vsprd_dmr <-rowSums(elections_2018[, c("dmr_PRD", "dmr_C.Común..PRD.MC.", "dmr_Coalición..PAN.PRD.MC.", "dmr_Coalición..PAN.PRD.","dmr_Coalición..PRD.MC.")],na.rm = TRUE)/elections_2018$dmr_valid_votes
  elections_2018$vsmorena_dmr <- rowSums(elections_2018[, c("dmr_C.Común..PT.MORENA.PES.", "dmr_MORENA", "dmr_C.Común..PT.MORENA.", "dmr_C.Común..MORENA.PES.")],na.rm = TRUE)/elections_2018$dmr_valid_votes
  
  
  # read in aldf fpp (dmr)
  dmr_2018 <-read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2018_outcomes.csv") %>% 
    select(Distrito_18,Partido) %>% 
    rename(winner_dmr=Partido)
  
  elections_2018 <-left_join(elections_2018,dmr_2018, by="Distrito_18")
  
  #combining all types
  elections_2018$year <- 2018
  elections_2018 <- elections_2018[,c("SECC", "year",  "vspri_jd","vspt_jd", "vspan_jd", "vsprd_jd", "vsmorena_jd", "vspri_jg","vspt_jg", "vspan_jg", "vsprd_jg", "vsmorena_jg",  "vspri_dmr","vspt_dmr", "vspan_dmr", "vsprd_dmr", "vsmorena_dmr", "winner_dmr", "dmr_valid_votes", "jd_valid_votes", "jg_valid_votes", "jd_turnout", "dmr_turnout")]
  elections_2018$SECC<- as.character(elections_2018$SECC)
   rm(sec_del, dmr_2018)
  
   
  ####Combine all years prior to 2021 #####

  elections <- bind_rows(elections_2003, elections_2006, elections_2009, elections_2012, elections_2015, elections_2018)
  elections$cve_secc <-str_pad(elections$SECC, 4, "left", pad = "0")
  elections$SECC<- NULL

  rm(list = c("elections_2003", "elections_2006", "elections_2009", "elections_2012", "elections_2015", "elections_2018"))
  
  
  ########################################################################
          #Other edits: add jefe delegacional for alignment, 
          # calculate vote margin, create incumbent vote share
  #######################################################################  
  #read in jefe delegacional (for alignment)
  jd <- read.csv("chapter_5_rewarding_responsiveness/elections_raw/jefes_delegacionales.csv")
  jd$PARTIDO_JD[jd$PARTIDO_JD == "P.A.N."] <- "PAN"
  jd$PARTIDO_JD[jd$PARTIDO_JD == "P.R.I."] <- "PRI"
  jd$PARTIDO_JD[jd$PARTIDO_JD == "P.R.D."] <- "PRD"
  jd$PARTIDO_JD <- factor(jd$PARTIDO_JD, levels = c("MORENA", "PAN", "PRD", "PRI", "PT"), labels = 
                            c("MORENA", "PAN", "PRD", "PRI", "PT"))
  #join jefe delegacional to electoral sections 
  elections <- left_join(elections, jd, by = c("DEL", "year"))
  elections <- data.frame(elections)
  rm(jd)
  
  #Calculate Vote Margin for JD and JG 
  #Electoral Margin
  #jd - note that we break this up and calculate it pre and post morena's entry 
  
  margins <- elections
  margins$max_jd <- NA
  margins[(margins$year == 2003 | margins$year == 2006|margins$year == 2009|margins$year == 2012),]$max_jd <- apply(margins[(margins$year == 2003 | margins$year == 2006|margins$year == 2009|margins$year == 2012),c("vsprd_jd", "vspri_jd", "vspan_jd")],1,max)
  margins[(margins$year == 2015 | margins$year == 2018),]$max_jd <- apply(margins[(margins$year == 2015| margins$year == 2018),c("vsprd_jd", "vspri_jd", "vspan_jd", "vsmorena_jd")],1,max)
  
  second_max <- function(x){
    return(max(x[x!=max(x)]))
  }
  
  margins$second_max_jd <- NA
  margins[(margins$year == 2003 | margins$year == 2006|margins$year == 2009|margins$year == 2012),]$second_max_jd <- apply(margins[(margins$year == 2003 | margins$year == 2006|margins$year == 2009|margins$year == 2012),c("vsprd_jd", "vspri_jd", "vspan_jd")],1,second_max)
  margins[(margins$year == 2015 | margins$year == 2018),]$second_max_jd <- apply(margins[(margins$year == 2015| margins$year == 2018),c("vsprd_jd", "vspri_jd", "vspan_jd", "vsmorena_jd")],1,second_max)
  margins$margin_jd <- (margins$max_jd - margins$second_max_jd)
  
  #jg - note that we only calculate this for years where a JG was elected
  margins$max_jg <- NA
  margins$second_max_jg <- NA
  margins[(margins$year == 2006|margins$year == 2012),]$max_jg <- apply(margins[(margins$year == 2006| margins$year == 2012),c("vsprd_jg", "vspri_jg", "vspan_jg")],1, max)
  margins[margins$year == 2018,]$max_jg <- apply(margins[(margins$year == 2018),c("vsprd_jg", "vspri_jg", "vspan_jg", "vsmorena_jg")],1, max)
  margins[(margins$year == 2006|margins$year == 2012),]$second_max_jg  <- apply(margins[(margins$year == 2006| margins$year == 2012),c("vsprd_jg", "vspri_jg", "vspan_jg")],1,second_max)
  margins[margins$year == 2018,]$second_max_jg  <- apply(margins[margins$year == 2018,c("vsprd_jg", "vspri_jg", "vspan_jg", "vsmorena_jg")],1,second_max)
  margins$margin_jg <- (margins$max_jg - margins$second_max_jg)
  margins <- margins[,c("cve_secc", "year", "margin_jd", "margin_jg" )]
  elections <- left_join(elections, margins, by = c("cve_secc", "year"))
  rm(margins)
  
  #Make vote share data tidy so that IVS is easy 
  vote_shares_jd <- elections %>% 
    select(year, cve_secc, vspri_jd, vspan_jd, vsprd_jd, vsmorena_jd, vspt_jd) %>%
    pivot_longer(cols = c(vspri_jd, vspan_jd, vsprd_jd, vsmorena_jd, vspt_jd),names_to = "party", names_prefix = "vs", values_to = "vote_share")
    vote_shares_jd$party <- str_replace(vote_shares_jd$party, pattern = "_jd",replacement= "")

  vote_shares_jg <- elections %>% 
    select(year, cve_secc, vspri_jg, vspan_jg, vsprd_jg, vsmorena_jg, vspt_jg) %>%
    pivot_longer(cols = c(vspri_jg, vspan_jg, vsprd_jg, vsmorena_jg, vspt_jg),names_to = "party", names_prefix = "vs", values_to = "vote_share")
    vote_shares_jg$party <- str_replace(vote_shares_jg$party, pattern = "_jg",replacement= "") 
  
  vote_shares_dmr <- elections %>% 
      select(year, cve_secc,  vspri_dmr, vspan_dmr, vsprd_dmr, vsmorena_dmr, vspt_dmr) %>%
      pivot_longer(cols = c(vspri_dmr, vspan_dmr, vsprd_dmr, vsmorena_dmr, vspt_dmr),names_to = "party", names_prefix = "vs", values_to = "vote_share")
    vote_shares_dmr$party <- str_replace(vote_shares_dmr$party, pattern = "_dmr",replacement= "") 
   
  # generate incumbent vote share  
    
    # get incumbent dmr.This is a little trickier than the others because we
    # need to use the right districts.
    
    winners_dmr <- elections %>% group_by(cve_secc, year) %>% dplyr::summarize(first(winner_dmr))  
   
    # handoff- winner in 2003 becomes incumbent in 2006
    winners_dmr$year <-case_when(
      winners_dmr$year==2003~2006,
      winners_dmr$year==2006~2009,
      winners_dmr$year==2009~2012,
      winners_dmr$year==2012~2015,
      winners_dmr$year==2015~2018,
      winners_dmr$year==2018~2021
    ) 
    colnames(winners_dmr)<- c("cve_secc","year","incumbent_dmr")
    
    # since we don't have the 2000 districts by seccion, we use the 2003 seccion
    inc_2003 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/dmr_2000_outcomes.csv") %>% 
      select(Distrito,Partido) %>%
      rename(DTO_LOC=Distrito,incumbent_dmr=Partido) %>%
      mutate(year=2003)
    secciones_2003 <- read.dbf("chapter_5_rewarding_responsiveness/elections_raw/drp03_cas.DBF") %>%
      select(SECCION,DTO_LOC) %>% 
      group_by(SECCION) %>% 
      summarise_all(funs(first)) %>% 
      left_join(.,inc_2003, by= "DTO_LOC") 
    secciones_2003$cve_secc<- str_pad(secciones_2003$SECCION, 4, "left", pad = "0")
    secciones_2003 <- secciones_2003 %>% select(cve_secc,year,incumbent_dmr)
    winners_dmr<-bind_rows(winners_dmr, secciones_2003)
    rm(inc_2003,secciones_2003)
  
  vote_shares_dmr <-left_join(vote_shares_dmr, winners_dmr,by =c("year","cve_secc"))  %>% 
  mutate(incumbent_dmr=tolower(incumbent_dmr))
  
  # clean up incumbency: assign incumbent status to prd in all coalitions where
  # it's present,and to pt inthe psd-pt coalition
  
  
  vote_shares_dmr$incumbent_dmr <-case_when(
    vote_shares_dmr$incumbent_dmr=="prd-pt" ~ "prd",
    vote_shares_dmr$incumbent_dmr=="prd-pt-convergencia"~ "prd",
    vote_shares_dmr$incumbent_dmr =="prd-pt-na"~"prd",
    vote_shares_dmr$incumbent_dmr=="pri-pvem"~"pri",
    vote_shares_dmr$incumbent_dmr=="psd-pt"~ "pt",
    TRUE~vote_shares_dmr$incumbent_dmr
    )
  
  ivs_dmr <-vote_shares_dmr[vote_shares_dmr$party==vote_shares_dmr$incumbent_dmr,] %>% 
    select(year,cve_secc,incumbent_dmr,vote_share) %>% 
    rename(ivs_dmr=vote_share)
  
  #JD
  ivs_jd <- elections %>%
    group_by(cve_secc) %>% 
    mutate(incumbent_jd = tolower(ifelse(year == 2003,
                                 "PRD",
                                 paste(dplyr::lag(PARTIDO_JD,1,order_by = year, default = NA))))) %>% 
    select(cve_secc, year, incumbent_jd) %>%
    rename(party = incumbent_jd) %>% 
    left_join(., vote_shares_jd, by = c("cve_secc", "year", "party")) %>% 
    rename(ivs_jd = vote_share, incumbent_jd=party)
   
  ivs_jg <- elections %>%
    mutate(incumbent_jg = "prd") %>% 
    select(cve_secc, year, incumbent_jg) %>%
    rename(party = incumbent_jg) %>% 
    left_join(., vote_shares_jg, by = c("cve_secc", "year", "party")) %>% 
    rename(ivs_jg = vote_share, incumbent_jg =party)
  
  
  elections <- left_join(elections, ivs_jg[c("cve_secc", "year", "ivs_jg","incumbent_jg")], by = c("cve_secc", "year")) %>% 
              left_join(.,ivs_jd[c("cve_secc", "year", "ivs_jd","incumbent_jd")], by = c("cve_secc", "year")) %>% 
              left_join(.,ivs_dmr[c("cve_secc", "year", "ivs_dmr","incumbent_dmr")], by = c("cve_secc", "year")) 

  elections$winner_dmr<-NULL
  
  elections %>%  mutate(miss_jg = ifelse(is.na(ivs_jg),1,0),
           miss_jd = ifelse(is.na(ivs_jd),1,0),
           miss_dmr = ifelse(is.na(ivs_dmr),1,0)) %>% 
    group_by(year) %>% 
    dplyr::summarize(miss_jg = sum(miss_jg),
                     miss_jd = sum(miss_jd),
                     miss_dmr =sum(miss_dmr))


  # Add in 2021 separately because it has its own logic for the incumbent party because of party coalitions
  rm(list=setdiff(ls(), "elections"))
  #########
  #2021
  #########
  # read in lista nominal 
  ln_2021 <- read_csv("chapter_5_rewarding_responsiveness/elections_raw/lista_nominal_seccion.csv")
  ln_2021 <- ln_2021 %>% clean_names() %>% 
    filter(demarcacion!="Total") %>% 
    select(seccion_electoral, lista_nominal) %>% 
    rename(seccion=seccion_electoral)
  # read in casilla level jd (alcalde, but keeping it jd for consistency) 
  jd_2021 <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/ResultadosAL-2021-06-10-16-09-14.xlsx", range = "A11:AK13187") %>% clean_names()
  # summarize to seccion level
  vars_to_sum <- colnames(jd_2021[,5:37])
  jd_2021 <- jd_2021 %>% 
    group_by(seccion,demarcacion, distrito) %>% summarize_at(vars_to_sum,sum) %>% ungroup()
  
  # check
  #jd_2021$total_votes <- jd_2021 %>% select(c(4:29, 34,35)) %>% rowSums(na.rm=TRUE)
  # the "total" categories are some kind of aggregate that I can't figure out how to decompose; get rid of them to be cautious 
  jd_2021 <- jd_2021 %>% select(-c("vot_total_31", "vot_total_32", "vot_total_33","vot_total_34"))
  colnames(jd_2021)[4:32] <- paste("jd", colnames(jd_2021)[4:32], sep = "_")
  
  # read in dmr
  dmr_2021 <- read_excel("chapter_5_rewarding_responsiveness/elections_raw/ResultadosMR-2021-06-10-12-34-30.xlsx", range = "A11:AM13187") %>% clean_names()
  # summarize to seccion level
  vars_to_sum <- colnames(dmr_2021[,4:39])
  dmr_2021 <- dmr_2021 %>% 
    group_by(seccion, distrito) %>% summarize_at(vars_to_sum,sum) %>% ungroup()
  # the "total" categories are some kind of aggregate that I can't figure out how to decompose; get rid of them to be cautious 
  dmr_2021 <- dmr_2021 %>% select(-c("vot_total_32", "vot_total_33", "vot_total_34","vot_total_35", "vot_total_36"))
  colnames(dmr_2021)[3:33] <- paste("dmr", colnames(dmr_2021)[3:33], sep = "_")
  
  elections_2021 <- list(dmr_2021,   jd_2021) %>% reduce(left_join, by = c("distrito", "seccion"))
  elections_2021 <- left_join(elections_2021, ln_2021, by ="seccion")
  
  elections_2021$jd_valid_votes <- elections_2021$jd_votacion_total_emitida - elections_2021$jd_votos_nulos
  elections_2021$jd_turnout <- rowSums(elections_2021[,c("jd_valid_votes","jd_votos_nulos")],na.rm=TRUE)/elections_2021$lista_nominal
  
  elections_2021$dmr_valid_votes <- elections_2021$dmr_votacion_total_emitida - elections_2021$dmr_votos_nulos
  elections_2021$dmr_turnout <- rowSums(elections_2021[,c("dmr_valid_votes","dmr_votos_nulos")],na.rm=TRUE)/elections_2021$lista_nominal
  
  # check if votes > ln
  elections_2021$off <- ifelse(elections_2021$jd_votacion_total_emitida > elections_2021$lista_nominal|elections_2021$dmr_votacion_total_emitida > elections_2021$lista_nominal ,1,0)
  elections_2021$dmr_turnout <- ifelse(elections_2021$off ==1, NA, elections_2021$dmr_turnout)
  elections_2021$jd_turnout <- ifelse(elections_2021$off ==1, NA, elections_2021$jd_turnout)
  #save(elections_2021, file = "2_Data/2_Elections/results_2021/vote_tallies_2021.Rdata")
  # here we don't calculate vote share for individual parties because the
  # coalitions become tricky. Instead we just do the incumbent vote share in
  # each alcaldia. I'm also skipping the dmr for now because the focus is on
  # alcaldia. Note that when the parties run in coalition such that the
  # "incumbent" party doesn't run, i count the votes for their coalition partner
  # (so if morena doesn't field a candidate in a place where there is a morena
  # incumbent, i count Pt votes)
  
  # OJO that Julio Cesar becomes MORENA so I'm considering a vote for MORENA a vote for the incumbent in VC
  
  elections_2021$cve_secc <- str_pad(elections_2021$seccion, 3, "left", "0")
  elections_2021$ivs_jd <- case_when(
    elections_2021$demarcacion == "BENITO JUÁREZ" ~  rowSums(elections_2021[, c("jd_pan", "jd_pan_pri_prd", "jd_pan_pri")],na.rm = TRUE)/elections_2021$jd_valid_votes,
    elections_2021$demarcacion == "COYOACÁN" ~ rowSums(elections_2021[, c("jd_prd", "jd_pan_pri_prd", "jd_pan_pri")],na.rm = TRUE)/elections_2021$jd_valid_votes,
    elections_2021$demarcacion == "MILPA ALTA" ~ elections_2021$jd_mc/elections_2021$jd_valid_votes,
   TRUE ~  rowSums(elections_2021[, c("jd_morena", "jd_pvem_morena", "jd_pt_morena", "jd_pvem_pt", "jd_pvem_pt_morena")],na.rm = TRUE)/elections_2021$jd_valid_votes
  )
  elections_2021$jd <- case_when(
    elections_2021$demarcacion == "BENITO JUÁREZ" ~ "pan",
    elections_2021$demarcacion == "COYOACÁN" ~ "prd",
    elections_2021$demarcacion == "MILPA ALTA" ~ "mc",
    elections_2021$demarcacion == "CUAJIMALPA DE MORELOS" ~ "pri",
    TRUE ~  "morena"
  )
  elections_2021$year <- 2021
  elections_2021$DEL <- as.character(factor(elections_2021$demarcacion, 
                               levels = c("ÁLVARO OBREGÓN", "AZCAPOTZALCO", "BENITO JUÁREZ", "COYOACÁN", "CUAJIMALPA DE MORELOS", 
                                          "CUAUHTÉMOC", "GUSTAVO A. MADERO", "IZTACALCO", "IZTAPALAPA", "LA MAGDALENA CONTRERAS",
                                          "MIGUEL HIDALGO", "MILPA ALTA", "TLÁHUAC", "TLALPAN", "VENUSTIANO CARRANZA", "XOCHIMILCO"), 
                               labels =c(10,2,14,3,4,15,5,6,7,8,16,9,11,12,17,13)))
  elections_2021 <- elections_2021 %>% select(cve_secc, DEL, year,ivs_jd)
  
  elections$DEL <-as.character(elections$DEL)
  elections <- bind_rows(elections, elections_2021)
  
  # keep only jefe delegacional/alcalde results and clean names
  elections <- elections %>%  select(year, vspri_jd, vspt_jd, vspan_jd, vsprd_jd, vsmorena_jd, 
                                     jd_valid_votes, jd_turnout, cve_secc,margin_jd, ivs_jd, incumbent_jd) %>% 
                                      clean_names()
  
  save(elections, file = "chapter_5_rewarding_responsiveness/elections.Rdata")
  
  

   